Topological Cluster Statistics (TCS)¶


Notebook 18: additional power assessments¶


This notebook contains scripts that perform various power assesments mainly included in the supplementary materials of the manuscript.


Packages and basic functions¶


Loading required packages

In [1]:
import os
import itertools
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import scipy.sparse as sparse
from sklearn import metrics
from distinctipy import distinctipy
from tqdm.notebook import tqdm

Basic functions

In [2]:
def ensure_dir(file_name):
    os.makedirs(os.path.dirname(file_name), exist_ok=True)
    return file_name


def write_np(np_obj, file_path):
    with open(file_path, 'wb') as outfile:
        np.save(outfile, np_obj)


def load_np(file_path):
    with open(file_path, 'rb') as infile:
        return np.load(infile)

Plot settings (latex is used for better plotting)

In [3]:
sns.set()
sns.set_style("darkgrid")

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
In [4]:
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{mathtools} \usepackage{sfmath}')

plt.rc('xtick', labelsize=20)
plt.rc('ytick', labelsize=20)
plt.rc('axes', labelsize=24)

plt.rc('figure', dpi=500)

Loading the ground truth¶


The ground truth stored in notebook 2 is loaded here.

In [5]:
# list of all tasks and the cope number related to each selected contrast
tasks = {
    'EMOTION': '3',  # faces - shapes
    'GAMBLING': '6',  # reward - punish
    'RELATIONAL': '4',  # rel - match
    'SOCIAL': '6',  # tom - random
    'WM': '20',  # face - avg
}
In [6]:
# Compute mean and std, followed by a parametric z-score (one sample t-test)
ground_truth_effect = {}
# Base directory where files are stored at
base_dir='/data/netapp01/work/sina/structural_clustering/PALM_revision_1'

for task in tqdm(tasks, desc="Tasks loop", leave=True):
    ground_truth_effect[task] = load_np(
        '{}/ground_truth/cohen_d_{}_cope{}.dscalar.npy'.format(base_dir, task, tasks[task]),
    )
Tasks loop:   0%|          | 0/5 [00:00<?, ?it/s]

Loading PALM results¶


PALM results stored in notebook 1 is loaded here.

In [7]:
%%time

# Number of random repetitions
repetitions = 500
# Different sample sizes tested
sample_sizes = [10, 20, 40, 80, 160, 320]
# Different cluster defining thresholds
cdts = [3.3, 2.8, 2.6, 2.0, 1.6]
# Number of brainordinates in a cifti file
Nv = 91282
# Base directory where files are stored at
base_dir='/data/netapp01/work/sina/structural_clustering/PALM_revision_1'

# Store loaded results in nested python dictionaries
loaded_maps = {}
loaded_maps['uncorrected_tstat'] = {}
loaded_maps['spatial_cluster_corrected_tstat'] = {}
loaded_maps['topological_cluster_corrected_tstat'] = {}

# Only use the z=3.3, p=0.001 for the main analyses reported here
# cdt = 3.3
# sample_size = 40
for task in tqdm(tasks, desc="Tasks loop", leave=True):
    loaded_maps['uncorrected_tstat'][task] = {}
    loaded_maps['spatial_cluster_corrected_tstat'][task] = {}
    loaded_maps['topological_cluster_corrected_tstat'][task] = {}
    for sample_size in tqdm(sample_sizes, desc="Sample size loop", leave=False):
        for cdt in tqdm(cdts, desc="CDT loop", leave=False):
            loaded_maps['uncorrected_tstat'][task][f'N={sample_size},CDT={cdt}'] = load_np(
                f'{base_dir}/summary/uncorrected_tstat_{task}_{sample_size}_samples_{cdt}_CDT.npy',
            )
            loaded_maps['spatial_cluster_corrected_tstat'][task][f'N={sample_size},CDT={cdt}'] = load_np(
                ensure_dir(f'{base_dir}/summary/spatial_cluster_corrected_tstat_{task}_{sample_size}_samples_{cdt}_CDT.npy'),
            )
            loaded_maps['topological_cluster_corrected_tstat'][task][f'N={sample_size},CDT={cdt}'] = load_np(
                ensure_dir(f'{base_dir}/summary/topological_cluster_corrected_tstat_{task}_{sample_size}_samples_{cdt}_CDT.npy'),
            )
Tasks loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CPU times: user 1.19 s, sys: 3min 24s, total: 3min 25s
Wall time: 31min 25s

Additional power assessments¶

In [8]:
def get_cohen_d_from_bonferroni_corrected_p_value(p, sample_size, multiple_comparisons):
    # only for two tailed T test
    unc_p = p / multiple_comparisons
    t_stat = stats.t.isf(unc_p/2, sample_size-1)
    cohen_d = t_stat / np.sqrt(sample_size)
    return cohen_d

def get_bonferroni_corrected_p_value_from_cohen_d(d, sample_size, multiple_comparisons):
    t_stat = d * np.sqrt(sample_size)
    unc_p = stats.t.sf(np.abs(t_stat), sample_size-1)*2
    p = unc_p * multiple_comparisons
    return p

def get_uncorrected_p_value_from_cohen_d(d, sample_size, multiple_comparisons):
    t_stat = d * np.sqrt(sample_size)
    unc_p = stats.t.sf(np.abs(t_stat), sample_size-1)*2
    return unc_p

def compute_normalized_partial_area_under_curve(fpr, tpr, lowest_fpr, highest_fpr):
    fpr_limited = np.sort(fpr[(fpr > lowest_fpr) & (fpr < highest_fpr)])
    tpr_limited = np.sort(tpr[(fpr > lowest_fpr) & (fpr < highest_fpr)])
    return (metrics.auc(fpr_limited, tpr_limited) / (fpr_limited.max() - fpr_limited.min()))

def compute_mean_informedness(fpr, tpr, lowest_fpr, highest_fpr):
    fpr_limited = np.sort(fpr[(fpr > lowest_fpr) & (fpr < highest_fpr)])
    tpr_limited = np.sort(tpr[(fpr > lowest_fpr) & (fpr < highest_fpr)])
    return (metrics.auc(fpr_limited, tpr_limited-fpr_limited) / (fpr_limited.max() - fpr_limited.min()))

def compute_normalized_concordant_partial_area_under_curve(fpr, tpr, lowest_fpr, highest_fpr):
    fpr_limited = np.sort(fpr[(fpr > lowest_fpr) & (fpr < highest_fpr)])
    tpr_limited = np.sort(tpr[(fpr > lowest_fpr) & (fpr < highest_fpr)])
    return ((metrics.auc(fpr_limited, tpr_limited) / (fpr_limited.max() - fpr_limited.min())) + (metrics.auc(tpr_limited, 1-fpr_limited) / (tpr_limited.max() - tpr_limited.min())))/2
In [9]:
def get_all_cluster_extents(cortical_effect, effect_threshold, cluster_connectivity, magnitude=True):
    # first mask regions with weak effects
    if magnitude:
        suprathreshold_mask = sparse.diags((np.abs(cortical_effect) > effect_threshold).astype(float), 0)
    else:
        suprathreshold_mask = sparse.diags(((cortical_effect) > effect_threshold).astype(float), 0)

    # update connectivity to keep masked edges only
    thresholded_connectivity = (suprathreshold_mask.dot(cluster_connectivity.dot(suprathreshold_mask))).tocsr()

    # compute all connected components
    n_components, labels = sparse.csgraph.connected_components(csgraph=thresholded_connectivity, directed=False, return_labels=True)

    # count the labels and report clusters
    unique, counts = np.unique(labels, return_counts=True)
    label_counts = dict(zip(unique, counts))

    label_replace = {x: (x if (label_counts[x] > 1) else -1) for x in label_counts}

    cluster_labels = [label_replace[x] for x in labels]
    cluster_size = {x: label_counts[x] for x in label_counts if (label_counts[x] > 1)}

    final_label_replace = {x: i for (i, x) in enumerate(np.unique(cluster_labels))}
    # final_label_replace[-1] = -1
    final_cluster_size = {final_label_replace[x]: cluster_size[x] for x in cluster_size}
    final_cluster_labels = [final_label_replace[x] for x in cluster_labels]

    return (final_cluster_size, final_cluster_labels)
In [10]:
%%time

# number of repititions for every sample size (repeated subsampling)
repetitions = 500
Nv = 91282

logp_threshold = -np.log10(0.05)
# ground_truth_effect_thresholds = np.linspace(0.01,0.2,)
# a stringent bonferroni correction for the putative ground truth
ground_truth_effect_threshold = get_cohen_d_from_bonferroni_corrected_p_value(0.05, 1000, 91282)


# bookmarker informedness
BM = {}
TPR = {}
FPR = {}

for task in tqdm(tasks, desc="Tasks loop", leave=True):
    BM[task] = {}
    TPR[task] = {}
    FPR[task] = {}
    for sample_size in tqdm(sample_sizes, desc="Sample size loop", leave=False):
        for cdt in tqdm(cdts, desc="CDT loop", leave=False):
            BM[task][f'N={sample_size},CDT={cdt}'] = {}
            TPR[task][f'N={sample_size},CDT={cdt}'] = {}
            FPR[task][f'N={sample_size},CDT={cdt}'] = {}
            BM[task][f'N={sample_size},CDT={cdt}']['spatial'] = []
            BM[task][f'N={sample_size},CDT={cdt}']['topological'] = []
            TPR[task][f'N={sample_size},CDT={cdt}']['spatial'] = []
            TPR[task][f'N={sample_size},CDT={cdt}']['topological'] = []
            FPR[task][f'N={sample_size},CDT={cdt}']['spatial'] = []
            FPR[task][f'N={sample_size},CDT={cdt}']['topological'] = []

            t_stats = loaded_maps['uncorrected_tstat'][task][f'N={sample_size},CDT={cdt}']
            t_stats = t_stats[~np.isnan(t_stats).any(axis=1)]

            spatial_cluster_logps = loaded_maps['spatial_cluster_corrected_tstat'][task][f'N={sample_size},CDT={cdt}']
            spatial_cluster_logps = spatial_cluster_logps[~np.isnan(spatial_cluster_logps).any(axis=1)]
            # statistical predictions
            spatial_increased_activation = (spatial_cluster_logps>logp_threshold) & (t_stats>0)
            spatial_decreased_activation = (spatial_cluster_logps>logp_threshold) & (t_stats<0)
            spatial_no_change_in_activation = (spatial_cluster_logps<logp_threshold)

            topological_cluster_logps = loaded_maps['topological_cluster_corrected_tstat'][task][f'N={sample_size},CDT={cdt}']
            topological_cluster_logps = topological_cluster_logps[~np.isnan(topological_cluster_logps).any(axis=1)]
            # statistical predictions
            topological_increased_activation = (topological_cluster_logps>logp_threshold) & (t_stats>0)
            topological_decreased_activation = (topological_cluster_logps>logp_threshold) & (t_stats<0)
            topological_no_change_in_activation = (topological_cluster_logps<logp_threshold)

            # for ground_truth_effect_threshold in tqdm(ground_truth_effect_thresholds, desc="Thresholds loop", leave=False):
            # ground truth conditions
            true_increased_activation = ground_truth_effect[task] > ground_truth_effect_threshold
            true_decreased_activation = ground_truth_effect[task] < -ground_truth_effect_threshold
            true_no_change_in_activation = np.abs(ground_truth_effect[task]) < ground_truth_effect_threshold

            spatial_true_positive_increased_activation = np.multiply(spatial_increased_activation, np.tile(true_increased_activation,(spatial_cluster_logps.shape[0],1))).sum(1)
            spatial_true_positive_decreased_activation = np.multiply(spatial_decreased_activation, np.tile(true_decreased_activation,(spatial_cluster_logps.shape[0],1))).sum(1)
            spatial_true_positive = spatial_true_positive_increased_activation + spatial_true_positive_decreased_activation
            spatial_true_negative = np.multiply(spatial_no_change_in_activation, np.tile(true_no_change_in_activation,(spatial_cluster_logps.shape[0],1))).sum(1)
            spatial_false_negative_increased_activation = np.multiply(spatial_no_change_in_activation, np.tile(true_increased_activation,(spatial_cluster_logps.shape[0],1))).sum(1)
            spatial_false_negative_decreased_activation = np.multiply(spatial_no_change_in_activation, np.tile(true_decreased_activation,(spatial_cluster_logps.shape[0],1))).sum(1)
            spatial_false_negative = spatial_false_negative_increased_activation + spatial_false_negative_decreased_activation
            spatial_false_positive = Nv - (spatial_true_positive + spatial_true_negative + spatial_false_negative)
            spatial_true_positive_rate = np.divide(spatial_true_positive, (spatial_true_positive + spatial_false_negative))
            spatial_false_positive_rate = np.divide(spatial_false_positive, (spatial_false_positive + spatial_true_negative))
            spatial_BM = spatial_true_positive_rate - spatial_false_positive_rate
            TPR[task][f'N={sample_size},CDT={cdt}']['spatial'].append(spatial_true_positive_rate)
            FPR[task][f'N={sample_size},CDT={cdt}']['spatial'].append(spatial_false_positive_rate)
            BM[task][f'N={sample_size},CDT={cdt}']['spatial'].append(spatial_BM)

            topological_true_positive_increased_activation = np.multiply(topological_increased_activation, np.tile(true_increased_activation,(topological_cluster_logps.shape[0],1))).sum(1)
            topological_true_positive_decreased_activation = np.multiply(topological_decreased_activation, np.tile(true_decreased_activation,(topological_cluster_logps.shape[0],1))).sum(1)
            topological_true_positive = topological_true_positive_increased_activation + topological_true_positive_decreased_activation
            topological_true_negative = np.multiply(topological_no_change_in_activation, np.tile(true_no_change_in_activation,(topological_cluster_logps.shape[0],1))).sum(1)
            topological_false_negative_increased_activation = np.multiply(topological_no_change_in_activation, np.tile(true_increased_activation,(topological_cluster_logps.shape[0],1))).sum(1)
            topological_false_negative_decreased_activation = np.multiply(topological_no_change_in_activation, np.tile(true_decreased_activation,(topological_cluster_logps.shape[0],1))).sum(1)
            topological_false_negative = topological_false_negative_increased_activation + topological_false_negative_decreased_activation
            topological_false_positive = Nv - (topological_true_positive + topological_true_negative + topological_false_negative)
            topological_true_positive_rate = np.divide(topological_true_positive, (topological_true_positive + topological_false_negative))
            topological_false_positive_rate = np.divide(topological_false_positive, (topological_false_positive + topological_true_negative))
            topological_BM = topological_true_positive_rate - topological_false_positive_rate
            TPR[task][f'N={sample_size},CDT={cdt}']['topological'].append(topological_true_positive_rate)
            FPR[task][f'N={sample_size},CDT={cdt}']['topological'].append(topological_false_positive_rate)
            BM[task][f'N={sample_size},CDT={cdt}']['topological'].append(topological_BM)

            TPR[task][f'N={sample_size},CDT={cdt}']['spatial'] = np.array(TPR[task][f'N={sample_size},CDT={cdt}']['spatial'][0])
            FPR[task][f'N={sample_size},CDT={cdt}']['spatial'] = np.array(FPR[task][f'N={sample_size},CDT={cdt}']['spatial'][0])
            BM[task][f'N={sample_size},CDT={cdt}']['spatial'] = np.array(BM[task][f'N={sample_size},CDT={cdt}']['spatial'][0])

            TPR[task][f'N={sample_size},CDT={cdt}']['topological'] = np.array(TPR[task][f'N={sample_size},CDT={cdt}']['topological'][0])
            FPR[task][f'N={sample_size},CDT={cdt}']['topological'] = np.array(FPR[task][f'N={sample_size},CDT={cdt}']['topological'][0])
            BM[task][f'N={sample_size},CDT={cdt}']['topological'] = np.array(BM[task][f'N={sample_size},CDT={cdt}']['topological'][0])
Tasks loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/6 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CDT loop:   0%|          | 0/5 [00:00<?, ?it/s]
CPU times: user 2min, sys: 1min 47s, total: 3min 48s
Wall time: 4min 6s

Sensitivity agains specificity for different CDTs¶

In [12]:
import scipy.stats as stats
from statsmodels.stats.power import TTestPower
from matplotlib.patches import Patch

%config InlineBackend.figure_format = 'retina'

plt.rc('figure', dpi=300)

analysis = TTestPower()

fig = plt.figure(figsize=(30, 5*len(sample_sizes)),constrained_layout=True)
gs = fig.add_gridspec(len(sample_sizes), 5)

colors = {
    'topological': matplotlib.colors.to_rgb('#027dad'),
    'spatial': matplotlib.colors.to_rgb('#ad023b'),
}

logp_threshold = -np.log10(0.05)

for ci, task in enumerate(tasks):
    for ri, sample_size in enumerate(sample_sizes):
        ax = fig.add_subplot(gs[ri, ci])
        sns.lineplot(
            x=[0,1],
            y=[0,1],
            style=True,
            color=(0,0,0),
            legend=False,
            linewidth=2,
            dashes=[(2,2)],
            ax=ax,
        )
        nauc = []
        for method in ['spatial', 'topological',]:
            tpr = [TPR[task][f'N={sample_size},CDT={cdt}'][method] for cdt in cdts]
            fpr = [FPR[task][f'N={sample_size},CDT={cdt}'][method] for cdt in cdts]

            ax.errorbar(
                x=[i.mean(0) for i in fpr],
                y=[i.mean(0) for i in tpr],
                xerr=[1.96 * stats.sem(i, 0) for i in fpr],
                yerr=[1.96 * stats.sem(i, 0) for i in tpr],
                color=colors[method],
                linewidth=3,
                elinewidth=2,
                capsize=3,
                capthick=2,
            )

            nauc.append(
                compute_normalized_partial_area_under_curve(
                    np.array([i.mean(0) for i in fpr]),
                    np.array([i.mean(0) for i in tpr]),
                    min([i.mean(0) for i in fpr]),
                    max([i.mean(0) for i in fpr]),
                )
            )

        ax.set_ylim(-0.05,1.05)

        leg = ax.legend(
            handles=[Patch(facecolor=np.append(colors[m], 1),) for m in ['spatial', 'topological',]],
            labels=['${:.1f}\%$'.format(100*x) for x in nauc],
            loc='center right',
            ncol=1,
            fontsize=18,
            title="$AUC$",
            title_fontsize=24,
            labelspacing=1.,
            handlelength=1.,
        )
        leg.set_zorder(30)
        for patch in leg.get_patches():
            patch.set_width(20)
            patch.set_height(20)
            patch.set_y(-4)

        xlabel = ''
        if ri == len(sample_sizes) - 1:
            xlabel = 'FPR'
        ax.set_xlabel(xlabel, fontsize=40)

        ax.set_xlim(-0.01,0.315)

        ylabel = ''
        if ci == 0:
            ylabel = 'TPR'.format(task)
        ax.set_ylabel(ylabel, fontsize=40)

        ax.set_facecolor(np.array([234,234,242])/255)
        ax.grid(color=(0.99,0.99,0.99,), linewidth=3)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.tick_params(axis='both', colors=(0.5,0.5,0.5), labelcolor=(0,0,0), direction='out')

fig.legend(
    handles=[Patch(facecolor=np.append(colors[m], 1),) for m in ['spatial', 'topological',]],
    labels=['Spatial', 'TCS'],
    loc='lower center',
    bbox_to_anchor=(0.5, 1.0),
    ncol=2, fontsize=40,
)
plt.show()

Assessing the sensitivity at the level of clusters¶

To assure both methods are performing as expected, we evaluate whether FWER (in a weak sense) is corrected at the level of clusters. To do so, we compute the likelihood of clusters containing true positives across all clusters that survive correction.

In [ ]:
# load adjacency files
local_adjacency = sparse.load_npz(f'{base_dir}/local_adjacency.npz')
structural_connectivity = sparse.load_npz(f'{base_dir}/unsmoothed_connectivity_mean_nonzero_freq_clnorm_truncated.npz')
hybrid_connectivity = local_adjacency + (structural_connectivity > 0).astype(float)

spatial_effect_found = {}
topological_effect_found = {}
for task in tqdm(tasks, desc="Tasks loop", leave=True):
    spatial_effect_found[task] = {}
    topological_effect_found[task] = {}
    for sample_size in tqdm(sample_sizes, desc="Sample size loop", leave=False):
        for cdt in tqdm(cdts, desc="CDT loop", leave=False):
            spatial_effect_found[task][f'N={sample_size},CDT={cdt}'] = []
            topological_effect_found[task][f'N={sample_size},CDT={cdt}'] = []
            for repetition in tqdm(range(repetitions), desc="Repetition loop", leave=False):

                ground_truth = ground_truth_effect[task]
                ground_truth_effect_threshold = get_cohen_d_from_bonferroni_corrected_p_value(0.05, 1000, 91282)
                # threshold = get_cohen_d_from_bonferroni_corrected_p_value(0.05, 1000, 1)
                threshold = ground_truth_effect_threshold

                uncorrected_tstat = loaded_maps['uncorrected_tstat'][task][f'N={sample_size},CDT={cdt}'][repetition]
                cohen_d = uncorrected_tstat / np.sqrt(sample_size)

                true_sign = np.sign(ground_truth)
                classes = np.multiply(uncorrected_tstat, true_sign)
                classes[np.abs(ground_truth) < threshold] = -np.abs(classes[np.abs(ground_truth) < threshold])

                spatial_cluster_corrected_tstat = loaded_maps['spatial_cluster_corrected_tstat'][task][f'N={sample_size},CDT={cdt}'][repetition]
                spatial_survived = (spatial_cluster_corrected_tstat > -np.log10(0.05)).astype(float)

                spatial_cluster_sizes, spatial_cluster_labels = get_all_cluster_extents(
                    spatial_survived, 0.5, local_adjacency, magnitude=False,
                )

                try:
                    spatial_effect_found[task][f'N={sample_size},CDT={cdt}'].append(classes[np.array(spatial_cluster_labels)>0].max()>0)
                except:
                    spatial_effect_found[task][f'N={sample_size},CDT={cdt}'].append(False)

                topological_cluster_corrected_tstat = loaded_maps['topological_cluster_corrected_tstat'][task][f'N={sample_size},CDT={cdt}'][repetition]
                topological_survived = (topological_cluster_corrected_tstat > -np.log10(0.05)).astype(float)

                topological_cluster_sizes, topological_cluster_labels = get_all_cluster_extents(
                    topological_survived, 0.5, hybrid_connectivity, magnitude=False,
                )

                try:
                    topological_effect_found[task][f'N={sample_size},CDT={cdt}'].append(classes[np.array(topological_cluster_labels)>0].max()>0)
                except:
                    topological_effect_found[task][f'N={sample_size},CDT={cdt}'].append(False)
In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10),)

sample_colors = np.array(sns.color_palette("rainbow", len(sample_sizes)))

for si, sample_size in enumerate(sample_sizes):
    sns.lineplot(
        x=cdts,
        # y=[np.sum(topological_effect_found[task][f'N={sample_size},CDT={cdt}']) for cdt in cdts],
        y=[np.sum(topological_effect_found[task][f'N={sample_size},CDT={cdt}']) / (loaded_maps['topological_cluster_corrected_tstat'][task][f'N={sample_size},CDT={cdt}'].sum(1) > 0).sum() for cdt in cdts],
        style=True,
        color=np.append(sample_colors[si]/1., 1),
        legend=False,
        linewidth=2,
        dashes=[(2,2)],
        ax=ax,
    )
In [ ]: